#|__________________________________________________________________________
 |
 | datatype.lsp
 | Copyright (c) 1991-2001 by Forrest W. Young
 | Contains methods to determine datatype

 Use (datatype?) to determine datatype of current data.
 

 The full set of possible datatypes, etc, are given by
    (possible-data-types) 
    (possible-data-extensions) 
    (possible-data-abbreviations)

 To see if data satisfy a particular datatype, send a message of the form 
    (send $ :datatype?) 
 where datatype is replaced by the datatype name for each datatype. E.g.:
    (send $ :freq?)
    (send $ :crosstabs?)
    etc.

 For a help message about the datatype aspects of the current dataobject, or of $, use
    (report-datatype) or
    (send $ :report-datatype) 

NOTE:
The (send $ :generalized-datatype) message applies the criteria
shown in the table below in order shown. These are not mutually
exclusive, though they are exhaustive. The generalized data-type
is the first one that is satisfied.

Criteria for generalized data-type (applied in order presented) are:
 1) mis MISSING       >0 missing elements
 2) mat MATRIX        >0 matrices
 3) cat CATEGORY      >0 category variables, =0 numeric variables, freq=t/nil
 5) fcl FREQCLASS     >0 category variables, =1 numeric variables, freq=t     
 6) cls CLASS         >0 category variables, =1 numeric variables, freq=nil
 7) ctb CROSSTABS     >0 category variables, >1 numeric variables, freq=t
 8) uni UNIVARIATE    =0 category variables, =1 numeric variables, freq=t/nil
 9) ftb FREQ TABLE    =0 category variables, >1 numeric variables, freq=t     
10) biv BIVARIATE     =0 category variables, =2 numeric variables, freq=t/nil
11) mlv MULTIVARIATE  =0 category variables, >2 numeric variables, freq=nil
12) gen GENERAL       none of the above
NOTES:
 a) Generalized datatype is the first one where the criteria are satisfied
 b) TABLE is no longer used.
 c) ORDINAL variables are treated as NUMERIC (change in the future).
 d) Defined on ACTIVE varibles, NOT ALL variables 

                              GENERALIZED DATATYPE
Number of  |       |                  
Category   | freq? |        Number of Numeric Variables 
Variables  |       |  0        1          2         >2 
    0      | not-f |  error    univariate bivariate multivariate
           |   f   |  error    freq       freq      freq
   >0      | not-f |  category class      general   general
           |   f   |  category freqclass  crosstabs crosstabs

NOTE WELL: 
The older DATATYPE message (send $ :datatype) should no longer be used.
It is based on the criteria specified below for generalized-data-type, 
but only for data-types MISSING, MATRIX, CATEGORY, CLASS and MULTIVARIATE, 
with multivariate being the wastebasket case.


 |___________________________________________________________________________
 |#

(defun datatype? (&optional (dob $) (ignore-missing nil) (ignore-new nil))
"Arg: (&optional (dataobject $))
Determines generalized-datatype of dataobject (current dataobject by default). Returns list of three strings: datatype, datatype abbreviation, datatype extension. Uses generalized-datatype function.
Aliased functions: (generalized-datatype) (data-type?)"
  (let ((return (generalized-datatype 
                 (send dob :active-types '(all)) 
                 (send dob :freq)
                 (if ignore-new nil (send dob :new-data?))
                 (if ignore-missing nil (send dob :missing-data?))
                 (send dob :matrix-data?))))
    (list (first return) (data-type-abbreviation (first return)) (second return))))

(defun generalized-datatype? (&rest args) (apply #'datatype? args))
(defun data-type? (&rest args) (apply #'datatype? args))

(defun generalized-datatype (types freq new missing matrix)
"Args: types freq new missing matrix
   Where types is a list of variables types, and freq, new, missing, and matrix are logical variables indicating the special datatype cases.
   Aliased functions - datatype and data-type.
   Determines data-type from a list of variable types, whether the data are frequencies, and whether it is new missing or matrix. Does not need a dataobject. Returns a list of two strings, the first is the data-type, the second the data-type extension abbreviation. Datatype strings are one of the following strings
\"missing\" \"matrix\" \"new\" \"category\" \"freq\" \"freqclass\" \"class\" \"crosstabs\" \"univariate\" \"bivariate\" \"multivariate\" \"general\"
Data-type extension abbreviations are one of these three-character strings \"mis\" \"mat\" \"new\" \"cat\" \"frq\" \"fcl\" \"cls\" \"xtb\" \"uni\" \"biv\" \"mlt\" \"gen\""
  (flet ((equal-str (x y)
                    (equal (string-downcase x) (string-downcase y)))
         )
    (let* ((nn (length (remove-if-not #'(lambda (x) (equal-str x "Numeric" )) types)))
           (nc (length (remove-if-not #'(lambda (x) (equal-str x "Category")) types)))
           (no (length (remove-if-not #'(lambda (x) (equal-str x "Ordinal" )) types)))
           (n  (+ nn no))
           )
      (cond
        ((= 0 (+ nn nc no))(error "there are no data"))
        (missing (list "missing" "mis"))
        (matrix  (list "matrix" "mat"))
        (new (list "new" "new"))
        ((> nc 0)
         (cond
           ((= n 0) (list "category" "cat"))
           ((= n 1) (if freq (list "freqclass" "fcl") (list "class" "cls")))
           (t       (if freq (list "crosstabs" "xtb") (list "general" "gen")))))
        ((= nc 0)
         (cond
           ((= n 1) (if freq (list "freq" "frq") (list "univariate" "uni")))
           ((= n 2) (if freq (list "freq" "frq") (list "bivariate" "biv")))
           ((> n 2) (if freq (list "freq" "frq") (list "multivariate" "mlt")))))
        (t (error "unidentifiable datatype"))))))

(defun data-type (&rest args) 
"Args: types freq new missing matrix
   Where types is a list of variables types, and freq, new, missing, and matrix are logical variables indicating the special datatype cases.
   Aliased functions - generalized-datatype and datatype.
   Determines data-type from a list of variable types, whether the data are frequencies, and whether it is new missing or matrix. Does not need a dataobject. Returns a list of two strings, the first is the data-type, the second the data-type extension abbreviation. Datatype strings are one of the following strings: \"missing\" \"matrix\" \"new\" \"category\" \"freq\" \"freqclass\" \"class\" \"crosstabs\" \"univariate\" \"bivariate\" \"multivariate\" \"general\"
Data-type extension abbreviations are one of these three-character strings \"mis\" \"mat\" \"new\" \"cat\" \"frq\" \"fcl\" \"cls\" \"xtb\" \"uni\" \"biv\" \"mlt\" \"gen\""
  (apply #'generalized-datatype args))

(defun datatype (&rest args)
"Args: types freq new missing matrix
   Where types is a list of variables types, and freq, new, missing, and matrix are logical variables indicating the special datatype cases.
   Aliased functions - datatype and generalized-datatype.
   Determines data-type from a list of variable types, whether the data are frequencies, and whether it is new missing or matrix. Does not need a dataobject. Returns a list of two strings, the first is the data-type, the second the data-type extension abbreviation. Datatype strings are one of the following strings
\"missing\" \"matrix\" \"new\" \"category\" \"freq\" \"freqclass\" \"class\" \"crosstabs\" \"univariate\" \"bivariate\" \"multivariate\" \"general\"
Data-type extension abbreviations are one of these three-character strings \"mis\" \"mat\" \"new\" \"cat\" \"frq\" \"fcl\" \"cls\" \"xtb\" \"uni\" \"biv\" \"mlt\" \"gen\""
  (apply #'generalized-datatype args))

(defmeth mv-data-object-proto :generalized-data-type ()
"Alias for :determine-data-type"
  (send self :determine-data-type))

(defmeth mv-data-object-proto :generalized-datatype ()
"Alias for :determine-data-type"
  (send self :determine-data-type))

(defmeth mv-data-object-proto :crosstabs-data? ()
"Crosstabs data are frequency data with 1 or more category variables and 2 or more numeric (frequency) variables"
  (flet ((equal-str (x y)
           (equal (string-downcase x) (string-downcase y)))
         )
    (if (send self :freq-data?)
        (let* ((types (send self :active-types '(all)))
               (nn (length (remove-if-not #'(lambda (x) (equal-str x "Numeric" )) types)))
               (nc (length (remove-if-not #'(lambda (x) (equal-str x "Category")) types)))
               (no (length (remove-if-not #'(lambda (x) (equal-str x "Ordinal" )) types)))
               )
          (and (>= nn 2) (>= nc 1))))))

(defmeth mv-data-object-proto :category-data? ()
"Category data only have category variables"
  (let ((unique-var-types
         (remove-duplicates 
          (mapcar #'string-downcase 
                  (send self :active-types '(all))) :test #'equal)))
    (and (= 1 (length unique-var-types))
         (equal "category" (string-downcase (first unique-var-types))))))

  
(defmeth mv-data-object-proto :classification-data? ()
"Classification data have 1 numeric variable and 1 or more category variables (and no ordinal variables."
  (flet ((equal-str (x y)
           (equal (string-downcase x) (string-downcase y)))
         )
    (let* ((types (send self :active-types '(all)))
           (nn (length (remove-if-not #'(lambda (x) (equal-str x "Numeric" )) types)))
           (nc (length (remove-if-not #'(lambda (x) (equal-str x "Category")) types)))
           (no (length (remove-if-not #'(lambda (x) (equal-str x "Ordinal" )) types)))
           )
      (and (= nn 1) (>= nc 1) (= no 0)))))

(defmeth mv-data-object-proto :missing-data? ()
"Returns T if any element of data is NIL, NIL otherwise."
  (send *watcher* :write-text "Checking for missing values" :show t)
  (let ((result (not (not (position nil (send self :data))))))
    (when (not result)
          (setf result (not (not (position " " (send self :data) :test #'equal))))
          (when (not result)
                (setf result (not (not (position "" (send self :data) :test #'equal)))))
          )
    (send *watcher* :close)
    result))

(defmeth mv-data-object-proto :freqclass-data? ()
"Only tests for mv data with 1 numeric var named freq... and > 0 category vars"
  (let ((nn (length (send self :active-variables '(numeric))))
        (no (length (send self :active-variables '(ordinal))))
        (nc (length (send self :active-variables '(category))))
        (freq?)(varname) (freq-list) (choice)
        )
    (when (and (= nn 1) (>= nc 1) (= no 0))
          (setf varname (string-downcase
                         (first (send self :active-variables '(numeric)))))
          (when (and (> (length varname) 3) (equal "freq" (subseq varname 0 4)))
                (setf freq? t)
                (setf freq-list (send self :variable varname))
                (when (which (mapcar #'(lambda (ele) (not (integerp ele))) freq-list))
                      (send *workmap* :postpone-redraw nil)
                      (setf choice (choose-item-dialog (format nil 
"ROUNDING YOUR DATA:~%These frequency data must be rounded~%because frequencies must be integers.") '("Round to NEAREST integer (round)" "Round DOWN to preceeding integer (floor)" "Round UP to next integer (ceiling)" )))
                      (cond 
                        ((= choice 0)
                         (send self :variable varname
                               (round   (send self :variable varname))))
                        ((= choice 1) (send self :variable varname
                                           (floor   (send self :variable varname))))
                        ((= choice 2) (send self :variable varname
                                           (ceiling (send self :variable varname))))))
                
                freq?))))


;;;;;fwy 2001-03-26 totally new method

(defmeth mv-data-object-proto :freq? (&optional active-only)
  (if (send self :freq) (send self :freq)
      (let ((types (send self :active-types '(all)))
            (nn) (no) (nc) (freq?)(varname))
        (cond
          (active-only
           (setf nn (length (send self :active-variables '(numeric))))      
           (setf no (length (send self :active-variables '(ordinal))))
           (setf nc (length (send self :active-variables '(category)))))
          (t
           (setf nn (length (remove-if-not #'(lambda (x) (equal x "Numeric" )) types)))
           (setf nc (length (remove-if-not #'(lambda (x) (equal x "Category")) types)))
           (setf no (length (remove-if-not #'(lambda (x) (equal x "Ordinal" )) types)))))
        (when (and (= nn 1) (>= nc 1) (= no 0))
              (setf varname (string-downcase
                             (first (send self :active-variables '(numeric)))))
              (when (and (> (length varname) 3) (equal "freq" (subseq varname 0 4)))
                    (setf freq? t)))
        freq?)))


(defmeth mv-data-object-proto :freq-data? (&rest args)
  (apply #'send self :freq? args))

;;;;;;pv 240101: this method did not exist - 

(defmeth mv-data-object-proto :matrix-data? ()
"Returns T if the data have matrices, else nil"
  (if (send self :matrices) t nil))

(defmeth mv-data-object-proto :multivariate-data? ()
"Multivariate data only have numeric/ordinal variables"
  (let ((unique-var-types
         (remove-duplicates 
          (mapcar #'string-downcase 
                  (send self :active-types '(all))) :test #'equal)))
    (not (member "category" unique-var-types :test #'equal))))

(defmeth mv-data-object-proto :bivariate-data? ()
"Bivariate data exactly two numeric/ordinal variables"
  (and (= 2 (length (send self :active-types '(all))))
       (send self :multivariate-data?)))

(defmeth mv-data-object-proto :univariate-data? ()
"Bivariate data exactly two numeric/ordinal variables"
  (and (= 1 (length (send self :active-types '(all))))
       (send self :multivariate-data?)))

(defmeth mv-data-object-proto :tour-data? ()
"Tour data have six or more numeric variables"
  (> (length (send self :active-types '(numeric))) 5))

(defun report-datatypes ()
  (send $ :report-datatypes))

(defmeth mv-data-object-proto :general-data? ()
"General data is data which are none of the other datatypes."
  (and (not (send self :matrix-data?))
       (not (send self :missing-data?))
       (not (send self :crosstabs-data?))
       (not (send self :multivariate-data?))
       (not (send self :bivariate-data?))
       (not (send self :univariate-data?))
       (not (send self :classification-data?))
       (not (send self :category-data?))
       (not (send self :freqclass-data?))))

(setf *report-datatypes* nil)

;;;;;;fwy 2001-03-26 totally new method

(defmeth mv-data-object-proto :determine-data-type ()
"Args- none
Determines the generalized-data-type of a dataobject and returns its value. Also determines the data-type and sets the data-type slot-value. 
  Data-Type is based on the criteria specified below for generalized-data-type, but only for data-types MISSING, MATRIX, CATEGORY, CLASS and MULTIVARIATE.
  Criteria for generalized data-type (applied in order presented) are:
 0) NEW           new-data slot is T
 1) MISSING       >0 missing elements
 2) MATRIX        >0 matrices
 3) CATEGORY      >0 category variables, =0 numeric variables, freq=t/nil
 5) FREQCLASS     >0 category variables, =1 numeric variables, freq=t     
 6) CLASS         >0 category variables, =1 numeric variables, freq=nil
 7) CROSSTABS     >0 category variables, >1 numeric variables, freq=t
 8) UNIVARIATE    =0 category variables, =1 numeric variables, freq=t/nil
 9) FREQ          =0 category variables, >1 numeric variables, freq=t     
10) BIVARIATE     =0 category variables, =2 numeric variables, freq=t/nil
11) MULTIVARIATE  =0 category variables, >2 numeric variables, freq=nil
12) GENERAL       none of the above

NOTES:
 a) Generalized datatype is the first one where the criteria are satisfied
 b) TABLE is no longer used.
 c) ORDINAL variables are treated as NUMERIC (change in the future).
 d) Defined on ACTIVE varibles, NOT ALL variables 

                              GENERALIZED DATATYPE
Number of  |       |                  
Category   | freq? |        Number of Numeric Variables 
Variables  |       |  0        1          2         >2 
    0      | not-f |  error    univariate bivariate multivariate
           |   f   |  error    freq       freq      freq
   >0      | not-f |  category class      general   general
           |   f   |  category freqclass  crosstabs crosstabs

                                    DATATYPE
Number of  |       |                  
Category   | freq? |        Number of Numeric Variables 
Variables  |       |  0        1            2            >2 
    0      | not-f |  error    multivariate multivariate multivariate
           |   f   |  error    freq         freq         freq
   >0      | not-f |  category class        multivariate multivariate
           |   f   |  category class        multivariate multivariate"

  (let ((nn (length (send self :active-variables '(numeric)))) 
        (no (length (send self :active-variables '(ordinal))))  
        (nc (length (send self :active-variables '(category))))
        (freq? (send self :freq?))
        (data-type))
    (cond
      ;0 NEW
      ((send self :new-data?)
       (setf data-type "new")
       (send self :data-type-abbrev "new")
       (send self :data-type "new"))
      ;1 MISSING
      ((send self :missing-data?)                         
       (setf data-type (send self :real-data-type))
       (send self :data-type-abbrev "mis")
       (send self :data-type "missing"));fwy changed from "new" 
      ;2 MATRIX
      ((send self :matrix-data?)                          
       (setf data-type "matrix")
       (send self :data-type-abbrev "mat")
       (send self :data-type "matrix"))
      ;RECTANGULAR
      (t
       (cond 
        ;NO CATEGORY VARIABLES
         ((= nc 0)
          (when (= (+ nn no) 0)
                (error "; :determine-data-type makes impossible branch (no data)"))
          (if freq?
              (progn
               (send self :data-type "freq")
               (send self :data-type-abbrev "frq")
               (setf data-type "freq"))
              (case (+ nn no)
                (1 
                 (send self :freq nil)
                 (send self :data-type "multivariate")
                 (send self :data-type-abbrev "uni")
                 (setf data-type "univariate"))
                (2 
                 (send self :freq nil)
                 (send self :data-type "multivariate")
                 (send self :data-type-abbrev "biv")
                 (setf data-type "bivariate"))
                (t 
                 (send self :freq nil)
                 (send self :data-type "multivariate")
                 (send self :data-type-abbrev "mlt")
                 (setf data-type "multivariate")))))
         ;ONE OR MORE CATEGORY VARIABLES
         (t
          (if freq?
              (case (+ nn no)
                ; CATEGORY + FREQ
                (0 
                 (send self :freq nil);force all category data to be non-freq
                 (send self :data-type "category")
                 (send self :data-type-abbrev "cat")
                 (setf data-type "category")
                 )
                ; CLASS + FREQ
                (1
                 (send self :freq t)
                 (send self :data-type "freqclass")  ;fwy changed from class feb 2002 but wanted to
                 (setf data-type "freqclass"))       ;fwy changed from class feb 2002 but wanted to
                (send self :data-type-abbrev "fcl")
                ; MULTIVARIATE + FREQ
                (t
                 (send self :freq t)
                 (send self :data-type "multivariate")
                 (setf data-type "crosstabs")        ;was multivariate fwy 20010326
                 (send self :data-type-abbrev "xtb")))
              (case (+ nn no)
                ; CATEGORY
                (0 
                 (send self :freq nil)
                 (send self :data-type "category")
                 (setf data-type "category")
                 (send self :data-type-abbrev "cat")
                 )
                ; CLASS
                (1
                 (send self :freq nil)
                 (send self :data-type "class")
                 (setf data-type "class")
                 (send self :data-type-abbrev "cls")
                 )
                (t
                 (send self :freq nil)
                 (send self :data-type "multivariate");both were crosstabs fwy 20010418 
                 (setf data-type "general")           ;both were multivariate fwy 20010326
                 (send self :data-type-abbrev "gen")
                 )))
          ))))
    (when *report-datatypes* (report-datatypes))
    data-type))


  
(defun report-datatypes ()
  (send $ :current-datatype))

(defmeth mv-data-object-proto :general-data? ()
"General data is data which are none of the other datatypes."
  (and (not (send self :matrix-data?))
       (not (send self :missing-data?))
       (not (send self :crosstabs-data?))
       (not (send self :multivariate-data?))
       (not (send self :bivariate-data?))
       (not (send self :univariate-data?))
       (not (send self :classification-data?))
       (not (send self :category-data?))
       (not (send self :freqclass-data?))))


(defun possible-datatypes ()
  (list  "missing" "matrix" "new" "category" "freq" "freqclass" "class" "crosstabs" "univariate" "bivariate" "multivariate" "general"))

(defun possible-data-types ()
  (list  "missing" "matrix" "new" "category" "freq" "freqclass" "class" "crosstabs" "univariate" "bivariate" "multivariate" "general"))

(defun possible-data-extensions ()
  (list "mis" "mat" "new" "cat" "frq" "fcl" "cls" "xtb" "uni" "biv" "mlt" "gen"))

(defun possible-data-abbreviations ()
  (list "Missing" "Matrix" "New" "Categ" "Frqncy" "FrqCls" "Class" "CrsTab" "UniVar" "BiVar" "MulVar" "Genral"))
  

(defmeth mv-data-object-proto :new-data? ()
  (send self :new-data))
(defmeth mv-data-object-proto :report-datatypes ()
  (current-datatype self)
  (setf *report-datatypes* t))

(defun current-datatype (&optional (cd $))
  (setf *report-datatypes* nil)
  (let ((cdname (string-upcase (send cd :name))))
  (help (strcat
         (format nil "CURRENT DATA IS: ~A " cdname)
         (format nil "~2%The possible datatypes are:~%~a~%" (possible-data-types))
         (format nil "~%The (generalized) datatype of ~a is ~s~%"
                 cdname (send cd :generalized-data-type))
         (format nil "~%The following table gives the 12 recognized datatypes with their recognized abbreviations, extensions, and fullnames.  In addition, the datatype abbreviations may be \"EnAbld\" \"DisAbld\" \"ReEnAbld\"~%") 
         (format nil "    New      new   new           ~a~%"(if (send cd :new-data?) "yes" "no"))
         (format nil "    Missing  mis   missing       ~a~%"(if (send cd :missing-data?) "yes" "no"))
         (format nil "    Matrix   mat   matrix        ~a~%"(if (send cd :matrix-data?) "yes" "no"))
         (format nil "    Categ    cat   category      ~a~%"(if (send cd :category-data?) "yes" "no"))
         (format nil "    UniVar   uni   univariate    ~a~%"(if (send cd :univariate-data?) "yes" "no"))
         (format nil "    BiVar    biv   bivariate     ~a~%"(if (send cd :bivariate-data?) "yes" "no"))
         (format nil "    MulVar   mlt   multivariate  ~a~%"(if (send cd :multivariate-data?) "yes" "no"))
         (format nil "    FrqCls   fcl   freqclass     ~a~%"(if (send cd :freqclass-data?) "yes" "no"))
         (format nil "    Class    cls   class         ~a~%"(if (send cd :classification-data?) "yes" "no"))
         (format nil "    Frqncy   frq   freq          ~a~%"(if (send cd :freq-data?) "yes" "no"))
         (format nil "    CrsTab   xtb   crosstabs     ~a~%"(if (send cd :crosstabs-data?) "yes" "no"))
         (format nil "    Genral   gen   general       ~a~%"(if (send cd :general-data?) "yes" "no"))
         (format nil "The (generalized) datatype of ~a is the first \"yes\" in the table above.~%" cdname)
         (format nil "~%The datatype tests for ~a are:~%" cdname)
         (format nil "(send ~a :freq-data?)            ~a~%" cdname (send cd :freq-data?))
         (format nil "(send ~a :freq)                  ~a~%" cdname (send cd :freq))
         (format nil "(send ~a :data-type)             ~a~%" cdname (send cd :data-type))
         (format nil "(send ~a :generalized-data-type) ~a~%" cdname (send cd :generalized-data-type))
         (format nil "~%The generalized-datatype function (shown below), as applied to ~a returns ~s. This function is the most efficient way of determining datatype.~%" cdname
                 (generalized-datatype (send $ :active-types '(all)) 
                            (send $ :freq)
                            (send $ :new-data?)
                            (send $ :missing-data?)
                            (send $ :matrix-data?)))
         (format nil "   (generalized-datatype~%     (send ~a :active-types '(all))~%     (send ~a :freq)~%     (send ~a :new)~%     (send ~a :missing)~%     (send ~a :matrices))~%" $ $ $ $ $)
         (format nil "~%Notes:~%Datatype is based ONLY on ACTIVE variables.~%ORDINAL variables are treated as NUMERIC.~%TABLE data are no longer recognized.~%")))
  (SEND *HELP-WINDOW* :TITLE "CURRENT DATATYPE REPORT")
  ))

(defmeth mv-data-object-proto :datashape? ()
  (datashape? self))

(defun datashape? (&optional (dob $))
  (unless (not dob)
          (let* ((type (string-downcase (first (datatype? dob)))) (L) (M))
            (cond
              ((equal type "matrix")
               (setf L (length (member "asymmetric" 
                                       (map-elements #'string-downcase (send dob :shapes))
                                       :test #'equal)))
               (setf M (send dob :nmat))
               (cond ((= L 0) "Symmetric") ((= L M) "Asymmetric") (t "Square")))
              (t "Rectangular")))))
 